import pandas as pd
import jieba
import jieba.analyse
from collections import Counter

data_need=pd.read_csv('boss1.csv')
jieba.load_userdict(r'C:\Users\apple\Desktop\1123.txt')
jieba.analyse.set_idf_path(r'C:\Users\apple\Desktop\userdict.txt.big')
a = []
b=[]
c=[]
num=0
for line in data_need['edu']:
    if line=='大专':
        texts1 = jieba.analyse.extract_tags(str(data_need['need'][num]), topK=5, withWeight=False, allowPOS=())
        a+=texts1
    if line=='本科':
        texts1 = jieba.analyse.extract_tags(str(data_need['need'][num]), topK=5, withWeight=False, allowPOS=())
        b += texts1
    if line=='硕士':
        texts1 = jieba.analyse.extract_tags(str(data_need['need'][num]), topK=5, withWeight=False, allowPOS=())
        c += texts1
    num+=1
a=Counter(a).most_common(30)
b=Counter(b).most_common(30)
c=Counter(c).most_common(30)

x_w=[]
x_words=[]
for i in data_need['need']:
    text=jieba.analyse.extract_tags(str(i), topK=5, withWeight=False, allowPOS=())
    for ac in text:
        x_w.append(ac)

x_word=Counter(x_w).most_common(30)
for word,key in x_word:
    x_words.append(word)

keya=[]

for aa in x_words:
    n_n = 0
    for word, key in a:
        n_n += 1
        if word==aa:
            keya.append(key)
            break
        else:
            if n_n==len(x_words):
                keya.append(0)

keyb=[]

for aa in x_words:
    n_n = 0
    for word, key in b:
        n_n += 1
        if word==aa:
            keyb.append(key)
            break
        else:
            if n_n==len(x_words):
                keyb.append(0)

keyc=[]

for aa in x_words:
    n_n = 0
    for word, key in c:
        n_n += 1
        if word==aa:
            keyc.append(key)
            break
        else:
            if n_n==len(x_words):
                keyc.append(0)


jsonData={}
jsonData['x_words']=x_words
jsonData['keya']=keya
jsonData['keyb']=keyb
jsonData['keyc']=keyc

j=json.dumps(jsonData)